eval_list = [
    "chess.match.dev.v0",
    "russian_sarcasm.dev.v0",
    "corr2cause.dev.v0",
    "syllables.dev.v1",
    "crepe.dev.v2",
    "coq-proof-step-match.dev.v0",
    "Chinese_character_riddles.dev.v0",
    "nepali-numerals.dev.v0",
    "belarusian-syllable-count.dev.v0",
    "smiles_to_formula.dev.v0",
    "mandaliof-table.dev.v0",
    "squares-gpt.dev.v0",
    "logic-statements.dev.v0",
    "russe.test.v0",
    "vigenere.s1.simple-v0",
    "sort-numbers.s1.simple-v0",
    "matrix_mult_rows.dev.v0",
    "moral_exceptQA.test.v1",
    "music-theory-triads-identification.dev.v0",
    "building_floorplan.test.v1",
    "lat_long_identify.dev.v0",
    "backgammon-can-hit.dev.v0",
    "belarusian-rhyme.dev.v0",
    "mate-in-one.dev.v0",
    "afrikaans-lexicon.dev.v0",
    "2d_movement.dev.v0",
    "korean_spelling.dev.v0",
    "rucola.test.v0",
    "ner_finance.dev.v0",
    "logiqa-logical-reasoning-plus.dev.v0",
    "italian_big_math_expression.dev.v0",
    "medmcqa.dev.v0",
    "japanese-remote-island-to-prefecture.dev.v0",
    "finger-tracking.dev.v0",
    "forth-stack-sim.dev.v0",
    "escher-sentences.dev.v0",
    "ph-calculation.dev.v0",
    "diabetes.dev.v0",
    "simple-block-puzzles.dev.v0",
    "poker_analysis.test.v1",
    "belarusian-numerals.dev.v0",
    "cissp-study-questions.test.v1",
    "linear-equations.dev.v0",
    "first-letters.dev.v0",
    "categorize-with-distractors.dev.v0",
    "ambiguous-sentences.dev.v0",
    "css-selectors-verbal.dev.v0",
    "japanese-itpassport-exam01.dev.v0",
    "logiqa.dev.v0",
    "chinese_zodiac.dev.v0",
]
